# Librarieslibrary(tidyverse)library(gmRi)library(scales)library(gt)library(patchwork)library(gtsummary)library(gt)library(sizeSpectra)library(gganimate)library(glue)conflicted::conflict_prefer("select", "dplyr")conflicted::conflict_prefer("filter", "dplyr")# Processing functionssource(here::here("Code/R/Processing_Functions.R"))# ggplot themetheme_set(theme_gmri(axis.line.y =element_line(),axis.ticks.y =element_line(), rect =element_rect(fill ="white", color =NA),panel.grid.major.y =element_blank(),strip.text.y =element_text(angle =0),axis.text.x =element_text(size =8),axis.text.y =element_text(size =8),legend.position ="bottom"))# labels for factor levelsarea_levels <-c("GoM", "GB", "SNE", "MAB")area_levels_long <-c("Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight")area_levels_all <-c("Northeast Shelf", area_levels)area_levels_long_all <-c("Northeast Shelf", area_levels_long)# table to join for swapping shorthand for long-hand namesarea_df <-data.frame(area =c("Scotian Shelf", "Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight", "Northeast Shelf"),survey_area =c("SS", "GoM", "GB", "SNE", "MAB", "Northeast Shelf"),area_titles =c("Scotian Shelf", "Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight", "Northeast Shelf"))# Degree symboldeg_c <-"\u00b0C"
Determining Spectra Minimum Sizes
Size spectra typically focus on the descending slope of the size distribution. This doc covers how that minima is selected and the process for doing so, and then the consequence of shifting the minima of the bounded distribution.
Code
# Data used for Wigley estimateswigley_in <-read_csv(here::here("Data/processed/wigley_species_trawl_data.csv")) %>%left_join(area_df)
The general idea is to bin the individual abundances by log2 size-bins, then determine the peak.
The following functions will aid in the labeling of log2 bins:
Code
# Broad Distribution#log2 bins for easy-of-access#' @title Build Log 2 Bin Structure Dataframe#' #' @description Used to build a dataframe containing equally spaced log2 bins for#' size spectra analysis. Contains details on the left and right limits, midpoint, bin width, #' and a text label for the bins. log2bin number ascends with increasing size for eeasy plotting.#'#' @param log2_min #' @param log2_max #' @param log2_increment #'#' @return#' @export#'#' @examplesdefine_log2_bins <-function(log2_min =0, log2_max =13, log2_increment =1){# How many bins n_bins <-length(seq(log2_max, log2_min + log2_increment, by =-log2_increment))# Build Equally spaced log2 bin df log2_bin_structure <-data.frame("log2_bins"=as.character(seq(n_bins, 1, by =-1)),"left_lim"=seq(log2_max - log2_increment, log2_min, by =-log2_increment),"right_lim"=seq(log2_max, log2_min + log2_increment, by =-log2_increment)) %>%mutate(bin_label =str_c(round(2^left_lim, 3), " - ", round(2^right_lim, 3), "g"),bin_width =2^right_lim -2^left_lim,bin_midpoint = (2^right_lim +2^left_lim) /2) %>%arrange(left_lim)return(log2_bin_structure)}#' @title Assign Manual log2 Bodymass Bins - By weight#'#' @description Manually assign log2 bins based on individual length-weight bodymass #' in increments of 1 on the log2 scale. Returns data with bins assigned based on individual#' length-weight biomass#' #' Uses maximum weight, and its corresponding bin as the limit.#'#' @param wmin_grams Catch data prepared for mle calculation, use prep_wmin_wmax#' @param log2_increment Equally spaced increments to use for log 2 bin sizes. Default = 0.5.#'#' @return#' @export#'#' @examplesassign_log2_bins <-function(wmin_grams, log2_increment =1){#### 1. Set up bodymass bins# filter missing weights size_data <- wmin_grams %>%filter(wmin_g >0,is.na(wmin_g) ==FALSE, wmax_g >0,is.na(wmax_g) ==FALSE)# Get bodymass on log2() scale size_data$log2_weight <-log2(size_data$ind_weight_g)# Set up the bins - Pull min and max weights from data available#min_bin <- floor(min(size_data$log2_weight)) min_bin <-0 max_bin <-ceiling(max(size_data$log2_weight))# Build a bin key, could be used to clean up the incremental assignment or for apply style functions log2_bin_structure <-define_log2_bins(log2_min = min_bin, log2_max = max_bin, log2_increment = log2_increment)# Loop through bins to assign the bin details to the data log2_assigned <- log2_bin_structure %>%split(.$log2_bins) %>%map_dfr(function(log2_bin){# limits and labels l_lim <- log2_bin$left_lim r_lim <- log2_bin$right_lim bin_num <-as.character(log2_bin$log2_bin)# assign the label to the appropriate bodymasses size_data %>%mutate(log2_bins =ifelse( between(log2_weight, l_lim, r_lim), bin_num, NA),log2_bins =as.character(log2_bins)) %>%drop_na(log2_bins) })# Join in the size bins log2_assigned <-left_join(log2_assigned, log2_bin_structure, by ="log2_bins")# return the data with the bins assignedreturn(log2_assigned)}#' @title Calculate Normalized and De-Normalized Abundances#'#' @description For binned size spectra estimation we use the stratified abundance divided by the#' bin widths (normalized size spectra). Another way to present the data is to de-normalize, which #' takes those values and multiplies them by the mid-point of the log-scale bins.#' #' min/max & bin_increments are used to enforce the presence of a size bin in the event that #' there is no abundance. This is done for comparing across different groups/areas that should #' conceivably have the same size range sampled.#'#' @param log2_assigned size data containing the bin assignments to use#' @param min_log2_bin Minimum 2^x value for the size spectra being measured (>=)#' @param max_log2_bin Maximum 2^x value for the size spectra being measured (<)#' @param bin_increment The bin-width on log scale that separates each bin#' @param ... Additional grouping factors with which to aggregate on besides the size bins themselves#'#' @return#' @export#'#' @examplesaggregate_log2_bins <-function( log2_assigned, min_log2_bin =0, max_log2_bin =13, bin_increment =1, ...){# Full Possible Bin Structure# Fills in any gaps log2_bin_structure <-define_log2_bins(log2_min = min_log2_bin, log2_max = max_log2_bin, log2_increment = bin_increment)# Capture all the group levels with a cheeky expand()if(missing(...) ==FALSE){ log2_bin_structure <- log2_bin_structure %>% tidyr::expand(left_lim, distinct(log2_assigned, ...)) %>%full_join(log2_bin_structure) }# Get bin breaks log2_breaks <-sort(unique(c(log2_bin_structure$left_lim, log2_bin_structure$right_lim)))# Get Totals for bodymass and abundances log2_aggregates <- log2_assigned %>%group_by(left_lim, ...) %>%summarise(abundance =sum(numlen_adj, na.rm = T),weight_g =sum(wmin_g, na.rm = T),.groups ="drop")# Join back in what the limits and labels are# The defined bins and their labels enforce the size limits log2_prepped <-left_join(x = log2_bin_structure, y = log2_aggregates)#### Fill Gaps with Zero's?? # This ensures that any size bin that is intended to be in use is actually used log2_prepped <- log2_prepped %>%mutate(abundance =ifelse(is.na(abundance), 1, abundance),weight_g =ifelse(is.na(weight_g), 1, weight_g))#### normalize abundances using the bin widths log2_prepped <- log2_prepped %>%mutate(normalized_abund = abundance / bin_width,normalized_biom = weight_g / bin_width,# # Remove Bins Where Normalized Biomass < 0? No!# normalized_abund = ifelse(normalized_abund < 2^0, NA, normalized_abund),# norm_strat_abund = ifelse(norm_strat_abund < 2^0, NA, norm_strat_abund) )# Add de-normalized abundances (abundance * bin midpoint) log2_prepped <- log2_prepped %>%mutate(denorm_abund = normalized_abund * bin_midpoint,denorm_biom = normalized_biom * bin_midpoint)# Return the aggregationsreturn(log2_prepped)}
I can’t auto-locate the tallest bin within all of that geom_histogram plot code, so it will be easier to do it outside and animate them using geom_col.
Here are all of them as an animation:
Code
# Get the plotting summarieswig_l2_aggs <- wig_l2 %>%unite("groupvar", area, season, est_year, sep ="XX") %>%split(.$groupvar) %>%map_dfr(function(x){# Get aggregates l2_aggs <-aggregate_log2_bins(x, bin_increment =1)# Locate Tallest tallest <- l2_aggs %>%arrange(desc(normalized_abund)) %>%slice(1) %>%pull(left_lim)# Add info back to aggregates l2_aggs <- l2_aggs %>%mutate(is_tallest =if_else(left_lim == tallest, T, F))# Returnreturn(l2_aggs)}, .id ="groupvar") %>%separate(groupvar, into =c("area", "season", "est_year"), sep ="XX") %>%mutate(area =factor(area, levels = area_levels_long_all),season =factor(season, levels =c("Spring", "Fall")))
Code
# # ggplot with animation# animated_plot <- ggplot(wig_l2_aggs) +# geom_col(# aes(# x = left_lim,# y = normalized_abund,# fill = season,# color = is_tallest),# alpha = 0.6,# linewidth = 1.5) +# scale_y_continuous(# trans = transform_log(base = 2),# labels = label_log(base = 2),# breaks = 2^seq(-10,16, 2)) +# scale_x_continuous(# labels = label_math(expr = 10^.x),# breaks = c(0:15)) +# scale_fill_gmri() +# scale_color_manual(values = c("transparent", "black")) +# facet_grid(area ~ season, scale = "free") +# labs(# y = "Normalized Abundance",# x = "Bodyweight (g)",# title = 'Year: {closest_state}', # Dynamic title for year# fill = "Season") +# transition_states(# est_year,# transition_length = 1,# state_length = 5) +# ease_aes('linear') # Smooth transition# # # Animate and save# animate(animated_plot, nframes = 300, fps = 10, width = 1000, height = 800)# anim_save(here::here("faceted_tallest_bin_spectra_wigley.gif"), animated_plot)# Show the GIFknitr::include_graphics(here::here("faceted_tallest_bin_spectra_wigley.gif"))
Bodymass spectra abundance peaks
Looking at Individual Years w/ Observable
I like having the ability to look at them one-by-one, but don’t want to make 800 plots or tabs. I’m using this exercise as an excuse to use observable for interactive selections.
Code
# Reformat years as a date - messes with slider# year_avgs <- mutate(year_avgs, est_year = as.Date(str_c(est_year, "-06-01")))# Define data to use for jsojs_define(data = wig_l2_aggs %>%arrange(est_year))# data = filter(wig_l2_aggs, normalized_abund > 1) %>% # arrange(est_year))
Code
viewof yearSlider = Inputs.range( [1970,2019],// If not pulling from the data {label:"Year:",value:1970,// If not pulling from the datastep:1} // Step size for numeric slider)// Create selections for season and area//viewof seasonSelect = Inputs.select(["Spring", "Fall"], { label: "Season" })viewof areaSelect = Inputs.select(["Northeast Shelf","Gulf of Maine","Georges Bank","Southern New England","Mid-Atlantic Bight"], { label:"Area" })// Filtering FunctionfilteredData =transpose(data).filter(function(d) {return yearSlider == d.est_year&& d.area=== areaSelect;})
Code
// Plot with FacetsPlot.plot({facet: {data: filteredData,x: d => d.area,// Columns: areay: d => d.season// Rows: season//y: { field: "season", domain: ["Spring", "Fall"] } // Custom row order - doesn't work },marks: [ Plot.barY(filteredData, { x:"left_lim",y:"normalized_abund",fill:"season",//stroke: "season",stroke: d => d.is_tallest?"black":"none",// Change stroke color for highlighted barsstrokeWidth: d => d.is_tallest?4:0// Thicker stroke for highlighted bars }) ],x: {label:"Body Mass (g)",tickFormat: d =>`2^${d}`// Format labels as powers of 2 },y: {label:"Abundance",type:"symlog",base:2 },style: {facetPadding:5 }})